import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats
import nltk
from wordcloud import WordCloud
from wordcloud import STOPWORDS, WordCloud
from PIL import Image
import plotly.express as px
import plotly.graph_objects as go
# Load "Books data"
df_data=pd.read_csv('books_data-1.csv')
#Load "Books rating"
df_rating = pd.read_csv('Books_rating.csv')
df_data.head(5)
| Title | description | authors | image | previewLink | publisher | publishedDate | infoLink | categories | ratingsCount | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Its Only Art If Its Well Hung! | NaN | ['Julie Strain'] | http://books.google.com/books/content?id=DykPA... | http://books.google.nl/books?id=DykPAAAACAAJ&d... | NaN | 1996 | http://books.google.nl/books?id=DykPAAAACAAJ&d... | ['Comics & Graphic Novels'] | NaN |
| 1 | Dr. Seuss: American Icon | Philip Nel takes a fascinating look into the k... | ['Philip Nel'] | http://books.google.com/books/content?id=IjvHQ... | http://books.google.nl/books?id=IjvHQsCn_pgC&p... | A&C Black | 2005-01-01 | http://books.google.nl/books?id=IjvHQsCn_pgC&d... | ['Biography & Autobiography'] | NaN |
| 2 | Wonderful Worship in Smaller Churches | This resource includes twelve principles in un... | ['David R. Ray'] | http://books.google.com/books/content?id=2tsDA... | http://books.google.nl/books?id=2tsDAAAACAAJ&d... | NaN | 2000 | http://books.google.nl/books?id=2tsDAAAACAAJ&d... | ['Religion'] | NaN |
| 3 | Whispers of the Wicked Saints | Julia Thomas finds her life spinning out of co... | ['Veronica Haddon'] | http://books.google.com/books/content?id=aRSIg... | http://books.google.nl/books?id=aRSIgJlq6JwC&d... | iUniverse | 2005-02 | http://books.google.nl/books?id=aRSIgJlq6JwC&d... | ['Fiction'] | NaN |
| 4 | Nation Dance: Religion, Identity and Cultural ... | NaN | ['Edward Long'] | NaN | http://books.google.nl/books?id=399SPgAACAAJ&d... | NaN | 2003-03-01 | http://books.google.nl/books?id=399SPgAACAAJ&d... | NaN | NaN |
df_rating.head(5)
| Id | Title | Price | User_id | profileName | review/helpfulness | review/score | review/time | review/summary | review/text | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1882931173 | Its Only Art If Its Well Hung! | NaN | AVCGYZL8FQQTD | Jim of Oz "jim-of-oz" | 7/7 | 4.0 | 940636800 | Nice collection of Julie Strain images | This is only for Julie Strain fans. It's a col... |
| 1 | 0826414346 | Dr. Seuss: American Icon | NaN | A30TK6U7DNS82R | Kevin Killian | 10/10 | 5.0 | 1095724800 | Really Enjoyed It | I don't care much for Dr. Seuss but after read... |
| 2 | 0826414346 | Dr. Seuss: American Icon | NaN | A3UH4UZ4RSVO82 | John Granger | 10/11 | 5.0 | 1078790400 | Essential for every personal and Public Library | If people become the books they read and if "t... |
| 3 | 0826414346 | Dr. Seuss: American Icon | NaN | A2MVUWT453QH61 | Roy E. Perry "amateur philosopher" | 7/7 | 4.0 | 1090713600 | Phlip Nel gives silly Seuss a serious treatment | Theodore Seuss Geisel (1904-1991), aka "D... |
| 4 | 0826414346 | Dr. Seuss: American Icon | NaN | A22X4XUPKF66MR | D. H. Richards "ninthwavestore" | 3/3 | 4.0 | 1107993600 | Good academic overview | Philip Nel - Dr. Seuss: American IconThis is b... |
## Books data
#Get a concise summary of the dataframe.
df_data.info()
#Check for the number of null values for each variables.
df_data.isnull().sum()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 212404 entries, 0 to 212403 Data columns (total 10 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Title 212403 non-null object 1 description 143962 non-null object 2 authors 180991 non-null object 3 image 160329 non-null object 4 previewLink 188568 non-null object 5 publisher 136518 non-null object 6 publishedDate 187099 non-null object 7 infoLink 188568 non-null object 8 categories 171205 non-null object 9 ratingsCount 49752 non-null float64 dtypes: float64(1), object(9) memory usage: 16.2+ MB
Title 1 description 68442 authors 31413 image 52075 previewLink 23836 publisher 75886 publishedDate 25305 infoLink 23836 categories 41199 ratingsCount 162652 dtype: int64
#### -Books Ratings
#Get a concise summary of the dataframe.
df_rating.info()
#Check for the number of null values for each variables.
df_rating.isnull().sum()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 3000000 entries, 0 to 2999999 Data columns (total 10 columns): # Column Dtype --- ------ ----- 0 Id object 1 Title object 2 Price float64 3 User_id object 4 profileName object 5 review/helpfulness object 6 review/score float64 7 review/time int64 8 review/summary object 9 review/text object dtypes: float64(2), int64(1), object(7) memory usage: 228.9+ MB
Id 0 Title 208 Price 2518829 User_id 561787 profileName 561886 review/helpfulness 0 review/score 0 review/time 0 review/summary 38 review/text 8 dtype: int64
#Delete variables that are irrelevant to the creation of the predictive model
df_data.drop(columns=['image', 'previewLink','infoLink','ratingsCount'], inplace=True)
df_data.head(5)
| Title | description | authors | publisher | publishedDate | categories | |
|---|---|---|---|---|---|---|
| 0 | Its Only Art If Its Well Hung! | NaN | ['Julie Strain'] | NaN | 1996 | ['Comics & Graphic Novels'] |
| 1 | Dr. Seuss: American Icon | Philip Nel takes a fascinating look into the k... | ['Philip Nel'] | A&C Black | 2005-01-01 | ['Biography & Autobiography'] |
| 2 | Wonderful Worship in Smaller Churches | This resource includes twelve principles in un... | ['David R. Ray'] | NaN | 2000 | ['Religion'] |
| 3 | Whispers of the Wicked Saints | Julia Thomas finds her life spinning out of co... | ['Veronica Haddon'] | iUniverse | 2005-02 | ['Fiction'] |
| 4 | Nation Dance: Religion, Identity and Cultural ... | NaN | ['Edward Long'] | NaN | 2003-03-01 | NaN |
#Delete variables that are irrelevant to the creation of the predictive model
df_rating.drop(columns=['Id', 'User_id','review/helpfulness','profileName'], inplace=True)
df_rating.head(5)
| Title | Price | review/score | review/time | review/summary | review/text | |
|---|---|---|---|---|---|---|
| 0 | Its Only Art If Its Well Hung! | NaN | 4.0 | 940636800 | Nice collection of Julie Strain images | This is only for Julie Strain fans. It's a col... |
| 1 | Dr. Seuss: American Icon | NaN | 5.0 | 1095724800 | Really Enjoyed It | I don't care much for Dr. Seuss but after read... |
| 2 | Dr. Seuss: American Icon | NaN | 5.0 | 1078790400 | Essential for every personal and Public Library | If people become the books they read and if "t... |
| 3 | Dr. Seuss: American Icon | NaN | 4.0 | 1090713600 | Phlip Nel gives silly Seuss a serious treatment | Theodore Seuss Geisel (1904-1991), aka "D... |
| 4 | Dr. Seuss: American Icon | NaN | 4.0 | 1107993600 | Good academic overview | Philip Nel - Dr. Seuss: American IconThis is b... |
#Do Margin-left Books data to Rating data via 'Title' column.
df=pd.merge(df_rating, df_data, on='Title', how='left')
df.head()
| Title | Price | review/score | review/time | review/summary | review/text | description | authors | publisher | publishedDate | categories | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Its Only Art If Its Well Hung! | NaN | 4.0 | 940636800 | Nice collection of Julie Strain images | This is only for Julie Strain fans. It's a col... | NaN | ['Julie Strain'] | NaN | 1996 | ['Comics & Graphic Novels'] |
| 1 | Dr. Seuss: American Icon | NaN | 5.0 | 1095724800 | Really Enjoyed It | I don't care much for Dr. Seuss but after read... | Philip Nel takes a fascinating look into the k... | ['Philip Nel'] | A&C Black | 2005-01-01 | ['Biography & Autobiography'] |
| 2 | Dr. Seuss: American Icon | NaN | 5.0 | 1078790400 | Essential for every personal and Public Library | If people become the books they read and if "t... | Philip Nel takes a fascinating look into the k... | ['Philip Nel'] | A&C Black | 2005-01-01 | ['Biography & Autobiography'] |
| 3 | Dr. Seuss: American Icon | NaN | 4.0 | 1090713600 | Phlip Nel gives silly Seuss a serious treatment | Theodore Seuss Geisel (1904-1991), aka "D... | Philip Nel takes a fascinating look into the k... | ['Philip Nel'] | A&C Black | 2005-01-01 | ['Biography & Autobiography'] |
| 4 | Dr. Seuss: American Icon | NaN | 4.0 | 1107993600 | Good academic overview | Philip Nel - Dr. Seuss: American IconThis is b... | Philip Nel takes a fascinating look into the k... | ['Philip Nel'] | A&C Black | 2005-01-01 | ['Biography & Autobiography'] |
df.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 3000000 entries, 0 to 2999999 Data columns (total 11 columns): # Column Dtype --- ------ ----- 0 Title object 1 Price float64 2 review/score float64 3 review/time int64 4 review/summary object 5 review/text object 6 description object 7 authors object 8 publisher object 9 publishedDate object 10 categories object dtypes: float64(2), int64(1), object(8) memory usage: 274.7+ MB
# There were 2518829 missing values for 'Price' in 'Book ratings'. We replaced the average value of 'Price'.
print(df['Price'].mean())
#21.76265587495903
21.76265587495903
df['Price']=df['Price'].fillna(df['Price'].mean())
df.isnull().sum()
#The missing value for 'Price' is gone.
Title 208 Price 0 review/score 0 review/time 0 review/summary 38 review/text 8 description 640225 authors 390634 publisher 782617 publishedDate 354581 categories 551498 dtype: int64
plt.figure(figsize=(10,10))
sns.heatmap(df.isnull(), cbar=False)
#The following graph shows the distribution of missing values in the data.
<AxesSubplot:>
#We removed all remaining missing values row by row. As a result, 2071176 data remained.
#We determined that enough data remained to analyze.
df=df.dropna()
df.isnull().sum()
Title 0 Price 0 review/score 0 review/time 0 review/summary 0 review/text 0 description 0 authors 0 publisher 0 publishedDate 0 categories 0 dtype: int64
df.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 2071176 entries, 1 to 2999999 Data columns (total 11 columns): # Column Dtype --- ------ ----- 0 Title object 1 Price float64 2 review/score float64 3 review/time int64 4 review/summary object 5 review/text object 6 description object 7 authors object 8 publisher object 9 publishedDate object 10 categories object dtypes: float64(2), int64(1), object(8) memory usage: 189.6+ MB
plt.figure(figsize=(10,10))
sns.heatmap(df.isnull(), cbar=False)
#The following graph shows that there are no more missing values.
<AxesSubplot:>
####Found abnormal value in "review/time".
# seaborn boxplot:
plt.figure(figsize=(12,8))
sns.boxplot(x='review/score',y='review/time' ,data=df)
plt.title('review/time per review/score')
plt.xlabel('review/score')
plt.ylabel('review/time')
#The following graph shows the 'review/time per review/score'. The'review/score' includes Outlier at 1, 2 and 5, respectively.
Text(0, 0.5, 'review/time')
#We checked their Outlier and found that 'review/time' was "-1". This is an impossible value, so we removed it.
df[df["review/time"]<100]
| Title | Price | review/score | review/time | review/summary | review/text | description | authors | publisher | publishedDate | categories | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 75745 | Julie and Julia: 365 Days, 524 Recipes, 1 Tiny... | 21.762656 | 2.0 | -1 | For once, the movie was better. | I purchased this book after seeing, and truly ... | Pushing thirty, living in a rundown apartment ... | ['Julie Powell'] | Viking | 2005 | ['Cookery, French'] |
| 75746 | Julie and Julia: 365 Days, 524 Recipes, 1 Tiny... | 21.762656 | 1.0 | -1 | Dazed and Confused | I had such high hopes for this book and I was ... | Pushing thirty, living in a rundown apartment ... | ['Julie Powell'] | Viking | 2005 | ['Cookery, French'] |
| 75747 | Julie and Julia: 365 Days, 524 Recipes, 1 Tiny... | 21.762656 | 2.0 | -1 | Disappointing...read My Life in France by Juli... | I eagerly snatched this book up when I saw it ... | Pushing thirty, living in a rundown apartment ... | ['Julie Powell'] | Viking | 2005 | ['Cookery, French'] |
| 75748 | Julie and Julia: 365 Days, 524 Recipes, 1 Tiny... | 21.762656 | 2.0 | -1 | Narcissistic Hipsters Should Cook, Not Write | After seeing the movie, I knew what I was in f... | Pushing thirty, living in a rundown apartment ... | ['Julie Powell'] | Viking | 2005 | ['Cookery, French'] |
| 75749 | Julie and Julia: 365 Days, 524 Recipes, 1 Tiny... | 21.762656 | 1.0 | -1 | "Taste"-less | Poor Julia Child, she deserved a better tribut... | Pushing thirty, living in a rundown apartment ... | ['Julie Powell'] | Viking | 2005 | ['Cookery, French'] |
| 75750 | Julie and Julia: 365 Days, 524 Recipes, 1 Tiny... | 21.762656 | 1.0 | -1 | Meanspirited woman | The writing was okayish... But these details f... | Pushing thirty, living in a rundown apartment ... | ['Julie Powell'] | Viking | 2005 | ['Cookery, French'] |
| 75751 | Julie and Julia: 365 Days, 524 Recipes, 1 Tiny... | 21.762656 | 1.0 | -1 | Leaves a bitter taste... | Julie and Julia looked so charming, and starte... | Pushing thirty, living in a rundown apartment ... | ['Julie Powell'] | Viking | 2005 | ['Cookery, French'] |
| 75752 | Julie and Julia: 365 Days, 524 Recipes, 1 Tiny... | 21.762656 | 2.0 | -1 | Leave out the swearing please!!!!! | This book would have been ten times better (an... | Pushing thirty, living in a rundown apartment ... | ['Julie Powell'] | Viking | 2005 | ['Cookery, French'] |
| 75753 | Julie and Julia: 365 Days, 524 Recipes, 1 Tiny... | 21.762656 | 1.0 | -1 | waste of time and money | A book as messy and nasty as some of the autho... | Pushing thirty, living in a rundown apartment ... | ['Julie Powell'] | Viking | 2005 | ['Cookery, French'] |
| 75754 | Julie and Julia: 365 Days, 524 Recipes, 1 Tiny... | 21.762656 | 2.0 | -1 | disappointing | I love both cooking and reading blogs, so I th... | Pushing thirty, living in a rundown apartment ... | ['Julie Powell'] | Viking | 2005 | ['Cookery, French'] |
| 2152018 | McKeachie's Teaching Tips, Strategies Research... | 21.762656 | 5.0 | -1 | Pretty good. | Pretty good at providing me with the fundament... | From the Publisher: This indispensable handboo... | ['Wilbert James McKeachie', 'Marilla Svinicki'... | Wadsworth Publishing Company | 2006 | ['Education'] |
#Remove to those datas
df=df[df['review/time'] != -1]
# seaborn boxplot:
plt.figure(figsize=(12,8))
sns.boxplot(x='review/score',y='review/time' ,data=df)
plt.title('review/time per review/score')
plt.xlabel('review/score')
plt.ylabel('review/time')
#The following graph shows that Outlier was successfully deleted.
Text(0, 0.5, 'review/time')
####Found abnormal value in "publishedDate".
#Create a new column named 'publishedDate_year' with only the year taken from 'publishedDate'.
df['publishedDate_year']=df['publishedDate'].str[:4]
#The "publishedDate_year" column had an unnatural value. ("19??", "199?","101-","2030")
#We Removed them.
df[df['publishedDate_year']=='19??']
| Title | Price | review/score | review/time | review/summary | review/text | description | authors | publisher | publishedDate | categories | publishedDate_year | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 296184 | Within a budding grove, | 21.762656 | 3.0 | 1265414400 | Say Hello to the Princess of Luxembourg | Proust is an odd one -- an essayist and memoir... | “A l’ombre des jeunes fi lles en fl eurs” est ... | ['Proust M.'] | Рипол Классик | 19?? | ['Fiction'] | 19?? |
| 296185 | Within a budding grove, | 21.762656 | 5.0 | 978566400 | Stunning! | Without a doubt, this is the best piece of lit... | “A l’ombre des jeunes fi lles en fl eurs” est ... | ['Proust M.'] | Рипол Классик | 19?? | ['Fiction'] | 19?? |
| 296186 | Within a budding grove, | 21.762656 | 5.0 | 1330128000 | Seascape with a frieze of boys... | Reading Proust is like drinking good tea. You ... | “A l’ombre des jeunes fi lles en fl eurs” est ... | ['Proust M.'] | Рипол Классик | 19?? | ['Fiction'] | 19?? |
| 296187 | Within a budding grove, | 21.762656 | 5.0 | 1239840000 | good edition of a classic | Though Proust is not for the casual reader, th... | “A l’ombre des jeunes fi lles en fl eurs” est ... | ['Proust M.'] | Рипол Классик | 19?? | ['Fiction'] | 19?? |
| 296188 | Within a budding grove, | 21.762656 | 5.0 | 1215820800 | Philosophy as narrative | Volume two of Le Proust's great work is a sens... | “A l’ombre des jeunes fi lles en fl eurs” est ... | ['Proust M.'] | Рипол Классик | 19?? | ['Fiction'] | 19?? |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2655929 | Melodious Etudes for Trombone Book 1 | 21.762656 | 5.0 | 916272000 | Outstanding Teaching of the "Basics"!!! | This book teaches basic skills in which any st... | 120 Melodious Etudes for Trombone, Book 1. Fro... | ['Joannes Rochut'] | Ravenio Books | 19?? | ['Music'] | 19?? |
| 2655930 | Melodious Etudes for Trombone Book 1 | 21.762656 | 5.0 | 1340150400 | music book | Book is great! Just what my son needed. The pa... | 120 Melodious Etudes for Trombone, Book 1. Fro... | ['Joannes Rochut'] | Ravenio Books | 19?? | ['Music'] | 19?? |
| 2655931 | Melodious Etudes for Trombone Book 1 | 21.762656 | 5.0 | 1318982400 | O1594 - Melodious Etudes for Trombone Book 1 (... | This is a great book to work out of, I love it... | 120 Melodious Etudes for Trombone, Book 1. Fro... | ['Joannes Rochut'] | Ravenio Books | 19?? | ['Music'] | 19?? |
| 2655932 | Melodious Etudes for Trombone Book 1 | 21.762656 | 5.0 | 1293494400 | Best Legato Studies Ever | This Book will help you develop the technique ... | 120 Melodious Etudes for Trombone, Book 1. Fro... | ['Joannes Rochut'] | Ravenio Books | 19?? | ['Music'] | 19?? |
| 2655933 | Melodious Etudes for Trombone Book 1 | 21.762656 | 5.0 | 1286323200 | Melodious Etudes for Trombone Book 1 | This is an excellent, well written skill book.... | 120 Melodious Etudes for Trombone, Book 1. Fro... | ['Joannes Rochut'] | Ravenio Books | 19?? | ['Music'] | 19?? |
81 rows × 12 columns
df[df['publishedDate_year']=='199?']
| Title | Price | review/score | review/time | review/summary | review/text | description | authors | publisher | publishedDate | categories | publishedDate_year | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 200828 | The Iliad: The Story of Achilles | 21.762656 | 4.0 | 1208131200 | One of our first war novels | One of our first war novels: the Achains and t... | This new, modern translation of The Iliad is f... | ['Homer'] | Signet Book | 199? | ['Epic poetry, Greek'] | 199? |
| 2919171 | Circular breathing for the wind performer | 21.762656 | 4.0 | 1306108800 | /you can find this information online | I was disappointed when I received this book. ... | Author Kynaston discusses the technique of cir... | ['Trent Kynaston'] | Alfred Music | 199? | ['Music'] | 199? |
| 2919172 | Circular breathing for the wind performer | 21.762656 | 3.0 | 1261958400 | So 10.95 for this ? | Although, of course, extremely useful, this is... | Author Kynaston discusses the technique of cir... | ['Trent Kynaston'] | Alfred Music | 199? | ['Music'] | 199? |
| 2994104 | The Iliad: The Story of Achilles | 21.762656 | 5.0 | 927417600 | The pride of Agamemnon and Achilles. | The version of the "Iliad" that I re... | This new, modern translation of The Iliad is f... | ['Homer'] | Signet Book | 199? | ['Epic poetry, Greek'] | 199? |
df[df['publishedDate_year']=='101-']
| Title | Price | review/score | review/time | review/summary | review/text | description | authors | publisher | publishedDate | categories | publishedDate_year | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 780252 | One wonderful night: A Romance of New York, | 21.762656 | 1.0 | 1349654400 | Not worth reading | Did not care for it. Very old fashioned and to... | Lady Hermione flees to New York to escape marr... | ['Louis Tracy'] | BEYOND BOOKS HUB | 101-01-01 | ['Fiction'] | 101- |
| 780253 | One wonderful night: A Romance of New York, | 21.762656 | 3.0 | 1351468800 | One Wonderful Night A Romance of New York | This book was hard to follow at times. It was ... | Lady Hermione flees to New York to escape marr... | ['Louis Tracy'] | BEYOND BOOKS HUB | 101-01-01 | ['Fiction'] | 101- |
| 780254 | One wonderful night: A Romance of New York, | 21.762656 | 3.0 | 1350950400 | one wonderful night a romance in new york | this was an interesting book set in a early ag... | Lady Hermione flees to New York to escape marr... | ['Louis Tracy'] | BEYOND BOOKS HUB | 101-01-01 | ['Fiction'] | 101- |
| 1639902 | Wild Sorceress | 18.000000 | 3.0 | 1121644800 | Story 5 stars, Writting 2 stars: | I give the story line 5 stars but the writing ... | In a world where hostile nations wield magic i... | ['Margaret L. Carter', 'Leslie Roy Carter'] | Writers Exchange E-Publishing | 101-01-01 | ['Fiction'] | 101- |
| 1639903 | Wild Sorceress | 18.000000 | 5.0 | 1313020800 | A Delightful Read | This fantasy novel features a very original sy... | In a world where hostile nations wield magic i... | ['Margaret L. Carter', 'Leslie Roy Carter'] | Writers Exchange E-Publishing | 101-01-01 | ['Fiction'] | 101- |
| 1645910 | "Charge that to my account," and other Gospel ... | 21.762656 | 5.0 | 1211760000 | One of the best presentations of salvation by ... | This little book shows the the gospel and salv... | We are "accepted in the beloved." The blessed ... | ['H.A. Ironside'] | Solid Christian Books | 101-01-01 | ['Religion'] | 101- |
| 1645911 | "Charge that to my account," and other Gospel ... | 21.762656 | 5.0 | 1211760000 | One of the best presentations of salvation by ... | This little book shows the the gospel and salv... | We are "accepted in the beloved." The blessed ... | ['H.A. Ironside'] | Solid Christian Books | 101-01-01 | ['Religion'] | 101- |
| 1645912 | "Charge that to my account," and other Gospel ... | 21.762656 | 5.0 | 1208995200 | One of the best presentations of salvation by ... | This little book shows the the gospel and salv... | We are "accepted in the beloved." The blessed ... | ['H.A. Ironside'] | Solid Christian Books | 101-01-01 | ['Religion'] | 101- |
| 1689407 | Lore Volume 1 (v. 1) | 21.762656 | 5.0 | 1148083200 | Something beautifully new! | I remember when I first picked up a single Lor... | This is the first Volume of Demonology And Dev... | ['Moncure Daniel Conway'] | BEYOND BOOKS HUB | 101-01-01 | ['Social Science'] | 101- |
| 1689408 | Lore Volume 1 (v. 1) | 21.762656 | 5.0 | 1138924800 | Lore | I picked this book up a little while ago knowi... | This is the first Volume of Demonology And Dev... | ['Moncure Daniel Conway'] | BEYOND BOOKS HUB | 101-01-01 | ['Social Science'] | 101- |
| 1689409 | Lore Volume 1 (v. 1) | 21.762656 | 4.0 | 1150070400 | Poise between extremes | I mean that poise in a lot of ways. First, the... | This is the first Volume of Demonology And Dev... | ['Moncure Daniel Conway'] | BEYOND BOOKS HUB | 101-01-01 | ['Social Science'] | 101- |
| 2036777 | The book of talismans, amulets, and zodiacal gems | 21.762656 | 5.0 | 1304467200 | Excellent explanatory and directive! | This is an easy to read encyclopedia of symbol... | With 10 illustrative Plates. This is a study o... | ['William Thomas and Kate Pavitt'] | BEYOND BOOKS HUB | 101-01-01 | ['Social Science'] | 101- |
| 2036778 | The book of talismans, amulets, and zodiacal gems | 21.762656 | 4.0 | 1347321600 | a worthy addition | If you are quite like me and love all things t... | With 10 illustrative Plates. This is a study o... | ['William Thomas and Kate Pavitt'] | BEYOND BOOKS HUB | 101-01-01 | ['Social Science'] | 101- |
| 2036779 | The book of talismans, amulets, and zodiacal gems | 21.762656 | 4.0 | 1309132800 | interesting public domain book | from 1922 an amazing collection of info on sym... | With 10 illustrative Plates. This is a study o... | ['William Thomas and Kate Pavitt'] | BEYOND BOOKS HUB | 101-01-01 | ['Social Science'] | 101- |
| 2036780 | The book of talismans, amulets, and zodiacal gems | 21.762656 | 3.0 | 1356566400 | It's ok. | This book has great explanation of each sign i... | With 10 illustrative Plates. This is a study o... | ['William Thomas and Kate Pavitt'] | BEYOND BOOKS HUB | 101-01-01 | ['Social Science'] | 101- |
| 2036781 | The book of talismans, amulets, and zodiacal gems | 21.762656 | 5.0 | 1354320000 | Great! | This is a book I have been truly waiting for. ... | With 10 illustrative Plates. This is a study o... | ['William Thomas and Kate Pavitt'] | BEYOND BOOKS HUB | 101-01-01 | ['Social Science'] | 101- |
| 2036782 | The book of talismans, amulets, and zodiacal gems | 21.762656 | 4.0 | 1359158400 | Good Information | The book has lots of good information. The onl... | With 10 illustrative Plates. This is a study o... | ['William Thomas and Kate Pavitt'] | BEYOND BOOKS HUB | 101-01-01 | ['Social Science'] | 101- |
| 2036783 | The book of talismans, amulets, and zodiacal gems | 21.762656 | 5.0 | 1356912000 | Very informative | Though an older book on the subject; it is ver... | With 10 illustrative Plates. This is a study o... | ['William Thomas and Kate Pavitt'] | BEYOND BOOKS HUB | 101-01-01 | ['Social Science'] | 101- |
| 2036784 | The book of talismans, amulets, and zodiacal gems | 21.762656 | 1.0 | 1355961600 | Poorly organized and written | I found this book poorly organized and written... | With 10 illustrative Plates. This is a study o... | ['William Thomas and Kate Pavitt'] | BEYOND BOOKS HUB | 101-01-01 | ['Social Science'] | 101- |
| 2036785 | The book of talismans, amulets, and zodiacal gems | 21.762656 | 2.0 | 1354665600 | Modern books beat this one | Modern books on this subject, which I approach... | With 10 illustrative Plates. This is a study o... | ['William Thomas and Kate Pavitt'] | BEYOND BOOKS HUB | 101-01-01 | ['Social Science'] | 101- |
| 2036786 | The book of talismans, amulets, and zodiacal gems | 21.762656 | 5.0 | 1354147200 | Well done | I really have no major complaints; it's a good... | With 10 illustrative Plates. This is a study o... | ['William Thomas and Kate Pavitt'] | BEYOND BOOKS HUB | 101-01-01 | ['Social Science'] | 101- |
| 2036787 | The book of talismans, amulets, and zodiacal gems | 21.762656 | 4.0 | 1354060800 | INTERESTING | There is a lot of information in this book, I ... | With 10 illustrative Plates. This is a study o... | ['William Thomas and Kate Pavitt'] | BEYOND BOOKS HUB | 101-01-01 | ['Social Science'] | 101- |
| 2036788 | The book of talismans, amulets, and zodiacal gems | 21.762656 | 4.0 | 1352419200 | THE BOOK OF TALISMANS, AMULETS AND ZODIACAL GEMS | This book is well written. It contains insight... | With 10 illustrative Plates. This is a study o... | ['William Thomas and Kate Pavitt'] | BEYOND BOOKS HUB | 101-01-01 | ['Social Science'] | 101- |
| 2036789 | The book of talismans, amulets, and zodiacal gems | 21.762656 | 3.0 | 1350777600 | not bad, not great | I downloaded this free sample of the book of a... | With 10 illustrative Plates. This is a study o... | ['William Thomas and Kate Pavitt'] | BEYOND BOOKS HUB | 101-01-01 | ['Social Science'] | 101- |
| 2036790 | The book of talismans, amulets, and zodiacal gems | 21.762656 | 4.0 | 1333238400 | Informative | I skimmed the information in this book. It was... | With 10 illustrative Plates. This is a study o... | ['William Thomas and Kate Pavitt'] | BEYOND BOOKS HUB | 101-01-01 | ['Social Science'] | 101- |
| 2069642 | The Bathtub Is Overflowing but I Feel Drained:... | 21.762656 | 5.0 | 1186358400 | A must for all moms! | This is an excellent book for any mom looking ... | PLEASE NOTE: This is a summary and analysis of... | ['ZIP Reads'] | ZIP Reads | 101-01-01 | ['Self-Help'] | 101- |
| 2069643 | The Bathtub Is Overflowing but I Feel Drained:... | 21.762656 | 5.0 | 1259020800 | Amazing! | I just finished reading this book and I loved ... | PLEASE NOTE: This is a summary and analysis of... | ['ZIP Reads'] | ZIP Reads | 101-01-01 | ['Self-Help'] | 101- |
| 2069644 | The Bathtub Is Overflowing but I Feel Drained:... | 21.762656 | 5.0 | 1223251200 | LOVE THIS!!! | My husband just bought me this book after seei... | PLEASE NOTE: This is a summary and analysis of... | ['ZIP Reads'] | ZIP Reads | 101-01-01 | ['Self-Help'] | 101- |
| 2069645 | The Bathtub Is Overflowing but I Feel Drained:... | 21.762656 | 5.0 | 1215648000 | Gives Mom A Quick Refreshing | I bought this book for myself, and am now purc... | PLEASE NOTE: This is a summary and analysis of... | ['ZIP Reads'] | ZIP Reads | 101-01-01 | ['Self-Help'] | 101- |
| 2069646 | The Bathtub Is Overflowing but I Feel Drained:... | 21.762656 | 5.0 | 1286928000 | great book | I am reading this book a chapter at a time wit... | PLEASE NOTE: This is a summary and analysis of... | ['ZIP Reads'] | ZIP Reads | 101-01-01 | ['Self-Help'] | 101- |
| 2069647 | The Bathtub Is Overflowing but I Feel Drained:... | 21.762656 | 5.0 | 1254528000 | Hilariously uplifting and informative :0) | I am absolutely in LOVE with this book! Everyt... | PLEASE NOTE: This is a summary and analysis of... | ['ZIP Reads'] | ZIP Reads | 101-01-01 | ['Self-Help'] | 101- |
| 2115497 | His Family | 21.762656 | 3.0 | 1245974400 | A good heart | When you read this, it is important to conside... | His Family by American writer Ernest Poole was... | ['Ernest Poole'] | BEYOND BOOKS HUB | 101-01-01 | ['Fiction'] | 101- |
| 2115498 | His Family | 21.762656 | 5.0 | 1218931200 | Changes in New York City life in the Early 20t... | One of my favorite Pulitzer winning pieces of ... | His Family by American writer Ernest Poole was... | ['Ernest Poole'] | BEYOND BOOKS HUB | 101-01-01 | ['Fiction'] | 101- |
| 2115499 | His Family | 21.762656 | 5.0 | 1357689600 | love it | Great book, I would recommend it to anyone who... | His Family by American writer Ernest Poole was... | ['Ernest Poole'] | BEYOND BOOKS HUB | 101-01-01 | ['Fiction'] | 101- |
| 2115500 | His Family | 21.762656 | 1.0 | 1281916800 | starts on chapter two | While I am sure the book is good, this copy th... | His Family by American writer Ernest Poole was... | ['Ernest Poole'] | BEYOND BOOKS HUB | 101-01-01 | ['Fiction'] | 101- |
| 2115501 | His Family | 21.762656 | 3.0 | 1027987200 | A Nostalgic Glimpse of Life | Ernest Poole's 'His Family' focuses on the lat... | His Family by American writer Ernest Poole was... | ['Ernest Poole'] | BEYOND BOOKS HUB | 101-01-01 | ['Fiction'] | 101- |
| 2115502 | His Family | 21.762656 | 3.0 | 1003622400 | The winner of the first Pulitzer Prize for fic... | Until coming across his name in a list of Puli... | His Family by American writer Ernest Poole was... | ['Ernest Poole'] | BEYOND BOOKS HUB | 101-01-01 | ['Fiction'] | 101- |
| 2115503 | His Family | 21.762656 | 5.0 | 966729600 | Lovely glimpse of the past | Poole's book tells the story of aging patriarc... | His Family by American writer Ernest Poole was... | ['Ernest Poole'] | BEYOND BOOKS HUB | 101-01-01 | ['Fiction'] | 101- |
| 2115504 | His Family | 21.762656 | 3.0 | 1125273600 | Pre-modern Modern | This novel has the distinction of winning the ... | His Family by American writer Ernest Poole was... | ['Ernest Poole'] | BEYOND BOOKS HUB | 101-01-01 | ['Fiction'] | 101- |
| 2115505 | His Family | 21.762656 | 4.0 | 1100995200 | The Dawning of the 20th Century | This is the very first of the Pulitzer Prize w... | His Family by American writer Ernest Poole was... | ['Ernest Poole'] | BEYOND BOOKS HUB | 101-01-01 | ['Fiction'] | 101- |
| 2115506 | His Family | 21.762656 | 3.0 | 1201392000 | The Changing Society Of The Early 1900s | "His Family" by Ernest Poole was published in ... | His Family by American writer Ernest Poole was... | ['Ernest Poole'] | BEYOND BOOKS HUB | 101-01-01 | ['Fiction'] | 101- |
| 2115507 | His Family | 21.762656 | 4.0 | 1359504000 | Roger Gale | In 1918, Ernest Poole's novel, "His Family" be... | His Family by American writer Ernest Poole was... | ['Ernest Poole'] | BEYOND BOOKS HUB | 101-01-01 | ['Fiction'] | 101- |
| 2115508 | His Family | 21.762656 | 4.0 | 1303776000 | Earnest and sympathetic novel | His Family is a very fine novel about widower ... | His Family by American writer Ernest Poole was... | ['Ernest Poole'] | BEYOND BOOKS HUB | 101-01-01 | ['Fiction'] | 101- |
| 2115509 | His Family | 21.762656 | 3.0 | 1274918400 | History repeats itself | This is the first fictional book that won the ... | His Family by American writer Ernest Poole was... | ['Ernest Poole'] | BEYOND BOOKS HUB | 101-01-01 | ['Fiction'] | 101- |
| 2115510 | His Family | 21.762656 | 4.0 | 1346284800 | Excellent Study of the Perils of Middle Age | It could just be that the main character is ab... | His Family by American writer Ernest Poole was... | ['Ernest Poole'] | BEYOND BOOKS HUB | 101-01-01 | ['Fiction'] | 101- |
| 2717219 | Echine (French Edition) | 21.762656 | 5.0 | 962928000 | Echine | My favorite book from Djian, its part of the B... | Extrait : "Avant de raconter un des plus effro... | ['OCTAVE MIRBEAU'] | BEYOND BOOKS HUB | 101-01-01 | ['Literary Collections'] | 101- |
df[df['publishedDate_year']=='2030']
| Title | Price | review/score | review/time | review/summary | review/text | description | authors | publisher | publishedDate | categories | publishedDate_year | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1384321 | A Wealth of Wisdom: Legendary African American... | 11.18 | 5.0 | 1074384000 | Wealth of Knowledge - History's Truths | Thanks to Dr. Cosby and Renee Pouissant we now... | The wisdom of our elders is our most valuable ... | ['Camille Cosby', 'Rene Poussaint'] | Simon and Schuster | 2030-12-31 | ['Biography & Autobiography'] | 2030 |
| 1384322 | A Wealth of Wisdom: Legendary African American... | 11.18 | 5.0 | 1076544000 | Couldn't Put It Down | This is one of those rare books I simply could... | The wisdom of our elders is our most valuable ... | ['Camille Cosby', 'Rene Poussaint'] | Simon and Schuster | 2030-12-31 | ['Biography & Autobiography'] | 2030 |
| 1384323 | A Wealth of Wisdom: Legendary African American... | 11.18 | 4.0 | 1075939200 | Enjoyed It!! | I enjoy reading this book. Really. The reason ... | The wisdom of our elders is our most valuable ... | ['Camille Cosby', 'Rene Poussaint'] | Simon and Schuster | 2030-12-31 | ['Biography & Autobiography'] | 2030 |
| 1384324 | A Wealth of Wisdom: Legendary African American... | 11.18 | 5.0 | 1077494400 | Great Advise | This is a wonderful book of advice from wise A... | The wisdom of our elders is our most valuable ... | ['Camille Cosby', 'Rene Poussaint'] | Simon and Schuster | 2030-12-31 | ['Biography & Autobiography'] | 2030 |
| 1384325 | A Wealth of Wisdom: Legendary African American... | 11.18 | 5.0 | 1355702400 | Women's Wit | So many women sharing views through their own ... | The wisdom of our elders is our most valuable ... | ['Camille Cosby', 'Rene Poussaint'] | Simon and Schuster | 2030-12-31 | ['Biography & Autobiography'] | 2030 |
df=df[df['publishedDate_year'] != '101-']
df=df[df['publishedDate_year'] != '2030']
df=df[df['publishedDate_year'] != '199?']
df=df[df['publishedDate_year'] != '19??']
#Changed from Object type to Int type
df['publishedDate_year'] = df['publishedDate_year'].astype(int)
df.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 2071029 entries, 1 to 2999999 Data columns (total 12 columns): # Column Dtype --- ------ ----- 0 Title object 1 Price float64 2 review/score float64 3 review/time int64 4 review/summary object 5 review/text object 6 description object 7 authors object 8 publisher object 9 publishedDate object 10 categories object 11 publishedDate_year int64 dtypes: float64(2), int64(2), object(8) memory usage: 205.4+ MB
#(10000 samples)
df_rating_sample10000=df.sample(n=10000)
#Create stopword list
stops= set(STOPWORDS)
textt = " ".join(x for x in df_rating_sample10000["review/text"])
wordcloud = WordCloud(stopwords=stops).generate(textt)
wordcloud.to_file("review.png")
<wordcloud.wordcloud.WordCloud at 0x7f908b8631f0>
im = Image.open("review.png")
im.resize((900, 400))
#5 points is the highest frequency.
plt.figure(figsize=(12,8))
sns.countplot(x='review/score', data=df)
<AxesSubplot:xlabel='review/score', ylabel='count'>
#We wondered if there might be some causal relationship between the length of 'review/text' and the rating,
#so we created a new 'text length' column that indicates the number of characters in each 'review/text'.
df["text length"] = df["review/text"].apply(len)
df.head()
| Title | Price | review/score | review/time | review/summary | review/text | description | authors | publisher | publishedDate | categories | publishedDate_year | text length | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | Dr. Seuss: American Icon | 21.762656 | 5.0 | 1095724800 | Really Enjoyed It | I don't care much for Dr. Seuss but after read... | Philip Nel takes a fascinating look into the k... | ['Philip Nel'] | A&C Black | 2005-01-01 | ['Biography & Autobiography'] | 2005 | 1423 |
| 2 | Dr. Seuss: American Icon | 21.762656 | 5.0 | 1078790400 | Essential for every personal and Public Library | If people become the books they read and if "t... | Philip Nel takes a fascinating look into the k... | ['Philip Nel'] | A&C Black | 2005-01-01 | ['Biography & Autobiography'] | 2005 | 1752 |
| 3 | Dr. Seuss: American Icon | 21.762656 | 4.0 | 1090713600 | Phlip Nel gives silly Seuss a serious treatment | Theodore Seuss Geisel (1904-1991), aka "D... | Philip Nel takes a fascinating look into the k... | ['Philip Nel'] | A&C Black | 2005-01-01 | ['Biography & Autobiography'] | 2005 | 3662 |
| 4 | Dr. Seuss: American Icon | 21.762656 | 4.0 | 1107993600 | Good academic overview | Philip Nel - Dr. Seuss: American IconThis is b... | Philip Nel takes a fascinating look into the k... | ['Philip Nel'] | A&C Black | 2005-01-01 | ['Biography & Autobiography'] | 2005 | 1542 |
| 5 | Dr. Seuss: American Icon | 21.762656 | 4.0 | 1127174400 | One of America's greatest creative talents | "Dr. Seuss: American Icon" by Philip Nel is a ... | Philip Nel takes a fascinating look into the k... | ['Philip Nel'] | A&C Black | 2005-01-01 | ['Biography & Autobiography'] | 2005 | 1975 |
#Let's examine the correlation between each 'review/score' and 'price', 'review/time' and 'text length'.
score_gb = df.groupby('review/score').mean()
score_gb
| Price | review/time | publishedDate_year | text length | |
|---|---|---|---|---|
| review/score | ||||
| 1.0 | 21.653179 | 1.118128e+09 | 2004.820585 | 793.900548 |
| 2.0 | 21.711868 | 1.121717e+09 | 2005.093863 | 928.639748 |
| 3.0 | 21.673666 | 1.136735e+09 | 2005.073905 | 986.441841 |
| 4.0 | 21.678704 | 1.135118e+09 | 2004.789454 | 946.258080 |
| 5.0 | 21.556469 | 1.128884e+09 | 2004.798769 | 756.711431 |
sns.heatmap(score_gb.corr(),cmap='coolwarm',annot=True)
<AxesSubplot:>
fig = go.Figure()
fig.add_trace(go.Histogram(x=df['publishedDate_year'], name='publishedDate_year'))
fig.update_traces(xbins=dict(start=1900,
end=2023,
size=0.5),
opacity=0.5)
fig.update_layout(barmode='overlay')
fig.show()
#After all, many of the books were published in the 2000s.